Read data

data <- readRDS("life_expectancy_data.RDS")

Plotly

## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Life expectancy_data America vs Africa

# stat.test <- data %>% filter(continent %in% c('Africa', 'Americas')) %>%t_test(`Life expectancy` ~ continent) %>% add_xy_position(x = "continent") 
# stat.test

# data %>% filter(continent %in% c('Africa', 'Americas')) %>% ggboxplot(x = "continent", y = "Life expectancy", ylab = "Life expectancy", xlab = "continent", add = "jitter") +  labs(subtitle = get_test_label(stat.test, detailed = TRUE)) + stat_pvalue_manual(stat.test, tip.length = 0) 

Corrplot

new_data <- data %>% select(!Year) %>% select(where(is.numeric))
data_cor <- cor(new_data) 
data_cor
##                                         Life expectancy Unemployment
## Life expectancy                               1.0000000 -0.122453828
## Unemployment                                 -0.1224538  1.000000000
## Infant Mortality                             -0.8309072  0.103775270
## GDP                                           0.1688143 -0.111437568
## GNI                                           0.1786383 -0.109486971
## Clean fuels and cooking technologies          0.7637583  0.063975082
## Per Capita                                    0.6034817 -0.183778304
## Mortality caused by road traffic injury      -0.6518097  0.173386776
## Tuberculosis Incidence                       -0.5831370  0.121480015
## DPT Immunization                              0.5044753 -0.147098313
## HepB3 Immunization                            0.4023880 -0.102304627
## Measles Immunization                          0.5323483 -0.166019661
## Hospital beds                                 0.4849944 -0.147215966
## Basic sanitation services                     0.8515922  0.029209789
## Tuberculosis treatment                       -0.3214166 -0.051270213
## Urban population                              0.5829745  0.085895193
## Rural population                             -0.5829745 -0.085895193
## Non-communicable Mortality                   -0.6338148  0.131242386
## Sucide Rate                                   0.1593575  0.006558983
##                                         Infant Mortality         GDP
## Life expectancy                              -0.83090718  0.16881430
## Unemployment                                  0.10377527 -0.11143757
## Infant Mortality                              1.00000000 -0.16907590
## GDP                                          -0.16907590  1.00000000
## GNI                                          -0.16096299  0.99435959
## Clean fuels and cooking technologies         -0.77567408  0.13751753
## Per Capita                                   -0.31858469  0.17389277
## Mortality caused by road traffic injury       0.65050858 -0.11544108
## Tuberculosis Incidence                        0.56359507 -0.08695088
## DPT Immunization                             -0.59029923  0.10813790
## HepB3 Immunization                           -0.52710787  0.08719372
## Measles Immunization                         -0.58504641  0.10139187
## Hospital beds                                -0.52085961  0.13217297
## Basic sanitation services                    -0.77056506  0.14600318
## Tuberculosis treatment                        0.27989549 -0.02495462
## Urban population                             -0.47460671  0.15210771
## Rural population                              0.47460671 -0.15210771
## Non-communicable Mortality                    0.66661171 -0.19176139
## Sucide Rate                                   0.05980403  0.11409037
##                                                 GNI
## Life expectancy                          0.17863826
## Unemployment                            -0.10948697
## Infant Mortality                        -0.16096299
## GDP                                      0.99435959
## GNI                                      1.00000000
## Clean fuels and cooking technologies     0.13470096
## Per Capita                               0.18266349
## Mortality caused by road traffic injury -0.11353833
## Tuberculosis Incidence                  -0.08693997
## DPT Immunization                         0.10537313
## HepB3 Immunization                       0.08210454
## Measles Immunization                     0.09835173
## Hospital beds                            0.13351160
## Basic sanitation services                0.15141620
## Tuberculosis treatment                  -0.02115128
## Urban population                         0.16783616
## Rural population                        -0.16783616
## Non-communicable Mortality              -0.18266265
## Sucide Rate                              0.13051116
##                                         Clean fuels and cooking technologies
## Life expectancy                                                   0.76375825
## Unemployment                                                      0.06397508
## Infant Mortality                                                 -0.77567408
## GDP                                                               0.13751753
## GNI                                                               0.13470096
## Clean fuels and cooking technologies                              1.00000000
## Per Capita                                                        0.38753491
## Mortality caused by road traffic injury                          -0.59553317
## Tuberculosis Incidence                                           -0.54553537
## DPT Immunization                                                  0.44831815
## HepB3 Immunization                                                0.38243439
## Measles Immunization                                              0.49991332
## Hospital beds                                                     0.43564106
## Basic sanitation services                                         0.84316638
## Tuberculosis treatment                                           -0.31357924
## Urban population                                                  0.59435666
## Rural population                                                 -0.59435666
## Non-communicable Mortality                                       -0.65526759
## Sucide Rate                                                       0.00976739
##                                          Per Capita
## Life expectancy                          0.60348166
## Unemployment                            -0.18377830
## Infant Mortality                        -0.31858469
## GDP                                      0.17389277
## GNI                                      0.18266349
## Clean fuels and cooking technologies     0.38753491
## Per Capita                               1.00000000
## Mortality caused by road traffic injury -0.41639961
## Tuberculosis Incidence                  -0.30760893
## DPT Immunization                         0.21421912
## HepB3 Immunization                       0.09303434
## Measles Immunization                     0.21571852
## Hospital beds                            0.24966424
## Basic sanitation services                0.45409579
## Tuberculosis treatment                  -0.32644726
## Urban population                         0.42880232
## Rural population                        -0.42880232
## Non-communicable Mortality              -0.35531918
## Sucide Rate                              0.32281997
##                                         Mortality caused by road traffic injury
## Life expectancy                                                      -0.6518097
## Unemployment                                                          0.1733868
## Infant Mortality                                                      0.6505086
## GDP                                                                  -0.1154411
## GNI                                                                  -0.1135383
## Clean fuels and cooking technologies                                 -0.5955332
## Per Capita                                                           -0.4163996
## Mortality caused by road traffic injury                               1.0000000
## Tuberculosis Incidence                                                0.4123296
## DPT Immunization                                                     -0.3400658
## HepB3 Immunization                                                   -0.2628041
## Measles Immunization                                                 -0.3107449
## Hospital beds                                                        -0.4909601
## Basic sanitation services                                            -0.6320693
## Tuberculosis treatment                                                0.3130249
## Urban population                                                     -0.3718674
## Rural population                                                      0.3718674
## Non-communicable Mortality                                            0.4071462
## Sucide Rate                                                          -0.1102582
##                                         Tuberculosis Incidence DPT Immunization
## Life expectancy                                    -0.58313705       0.50447529
## Unemployment                                        0.12148001      -0.14709831
## Infant Mortality                                    0.56359507      -0.59029923
## GDP                                                -0.08695088       0.10813790
## GNI                                                -0.08693997       0.10537313
## Clean fuels and cooking technologies               -0.54553537       0.44831815
## Per Capita                                         -0.30760893       0.21421912
## Mortality caused by road traffic injury             0.41232959      -0.34006575
## Tuberculosis Incidence                              1.00000000      -0.37169763
## DPT Immunization                                   -0.37169763       1.00000000
## HepB3 Immunization                                 -0.31215616       0.94776877
## Measles Immunization                               -0.37364785       0.88078924
## Hospital beds                                      -0.19543396       0.32366629
## Basic sanitation services                          -0.55532307       0.45942955
## Tuberculosis treatment                              0.23672979      -0.13993470
## Urban population                                   -0.33622933       0.22057595
## Rural population                                    0.33622933      -0.22057595
## Non-communicable Mortality                          0.48089925      -0.38159200
## Sucide Rate                                         0.09858654       0.05567581
##                                         HepB3 Immunization Measles Immunization
## Life expectancy                                 0.40238797           0.53234834
## Unemployment                                   -0.10230463          -0.16601966
## Infant Mortality                               -0.52710787          -0.58504641
## GDP                                             0.08719372           0.10139187
## GNI                                             0.08210454           0.09835173
## Clean fuels and cooking technologies            0.38243439           0.49991332
## Per Capita                                      0.09303434           0.21571852
## Mortality caused by road traffic injury        -0.26280410          -0.31074490
## Tuberculosis Incidence                         -0.31215616          -0.37364785
## DPT Immunization                                0.94776877           0.88078924
## HepB3 Immunization                              1.00000000           0.86161432
## Measles Immunization                            0.86161432           1.00000000
## Hospital beds                                   0.27225503           0.33526203
## Basic sanitation services                       0.38112985           0.50904494
## Tuberculosis treatment                         -0.09250053          -0.14092951
## Urban population                                0.13692089           0.24604275
## Rural population                               -0.13692089          -0.24604275
## Non-communicable Mortality                     -0.31401541          -0.38626279
## Sucide Rate                                    -0.01978305           0.02560727
##                                         Hospital beds Basic sanitation services
## Life expectancy                             0.4849944                0.85159219
## Unemployment                               -0.1472160                0.02920979
## Infant Mortality                           -0.5208596               -0.77056506
## GDP                                         0.1321730                0.14600318
## GNI                                         0.1335116                0.15141620
## Clean fuels and cooking technologies        0.4356411                0.84316638
## Per Capita                                  0.2496642                0.45409579
## Mortality caused by road traffic injury    -0.4909601               -0.63206935
## Tuberculosis Incidence                     -0.1954340               -0.55532307
## DPT Immunization                            0.3236663                0.45942955
## HepB3 Immunization                          0.2722550                0.38112985
## Measles Immunization                        0.3352620                0.50904494
## Hospital beds                               1.0000000                0.47445249
## Basic sanitation services                   0.4744525                1.00000000
## Tuberculosis treatment                     -0.1947393               -0.30065649
## Urban population                            0.2740715                0.55069603
## Rural population                           -0.2740715               -0.55069603
## Non-communicable Mortality                 -0.3562093               -0.52254411
## Sucide Rate                                 0.2665261                0.15953741
##                                         Tuberculosis treatment Urban population
## Life expectancy                                    -0.32141658       0.58297452
## Unemployment                                       -0.05127021       0.08589519
## Infant Mortality                                    0.27989549      -0.47460671
## GDP                                                -0.02495462       0.15210771
## GNI                                                -0.02115128       0.16783616
## Clean fuels and cooking technologies               -0.31357924       0.59435666
## Per Capita                                         -0.32644726       0.42880232
## Mortality caused by road traffic injury             0.31302487      -0.37186744
## Tuberculosis Incidence                              0.23672979      -0.33622933
## DPT Immunization                                   -0.13993470       0.22057595
## HepB3 Immunization                                 -0.09250053       0.13692089
## Measles Immunization                               -0.14092951       0.24604275
## Hospital beds                                      -0.19473929       0.27407149
## Basic sanitation services                          -0.30065649       0.55069603
## Tuberculosis treatment                              1.00000000      -0.28393086
## Urban population                                   -0.28393086       1.00000000
## Rural population                                    0.28393086      -1.00000000
## Non-communicable Mortality                          0.26680379      -0.53028884
## Sucide Rate                                        -0.07289482       0.08936862
##                                         Rural population
## Life expectancy                              -0.58297452
## Unemployment                                 -0.08589519
## Infant Mortality                              0.47460671
## GDP                                          -0.15210771
## GNI                                          -0.16783616
## Clean fuels and cooking technologies         -0.59435666
## Per Capita                                   -0.42880232
## Mortality caused by road traffic injury       0.37186744
## Tuberculosis Incidence                        0.33622933
## DPT Immunization                             -0.22057595
## HepB3 Immunization                           -0.13692089
## Measles Immunization                         -0.24604275
## Hospital beds                                -0.27407149
## Basic sanitation services                    -0.55069603
## Tuberculosis treatment                        0.28393086
## Urban population                             -1.00000000
## Rural population                              1.00000000
## Non-communicable Mortality                    0.53028884
## Sucide Rate                                  -0.08936862
##                                         Non-communicable Mortality  Sucide Rate
## Life expectancy                                         -0.6338148  0.159357534
## Unemployment                                             0.1312424  0.006558983
## Infant Mortality                                         0.6666117  0.059804035
## GDP                                                     -0.1917614  0.114090369
## GNI                                                     -0.1826627  0.130511162
## Clean fuels and cooking technologies                    -0.6552676  0.009767390
## Per Capita                                              -0.3553192  0.322819969
## Mortality caused by road traffic injury                  0.4071462 -0.110258162
## Tuberculosis Incidence                                   0.4808992  0.098586543
## DPT Immunization                                        -0.3815920  0.055675815
## HepB3 Immunization                                      -0.3140154 -0.019783046
## Measles Immunization                                    -0.3862628  0.025607272
## Hospital beds                                           -0.3562093  0.266526138
## Basic sanitation services                               -0.5225441  0.159537407
## Tuberculosis treatment                                   0.2668038 -0.072894819
## Urban population                                        -0.5302888  0.089368619
## Rural population                                         0.5302888 -0.089368619
## Non-communicable Mortality                               1.0000000  0.184023972
## Sucide Rate                                              0.1840240  1.000000000
corrplot(data_cor)

corrplot(data_cor, method = "color", type = "lower", 
         addCoef.col = "grey30", diag = FALSE,
         cl.pos = "b", tl.col = "grey10",
         col = COL2('RdBu', 10))

Hierarchical clustering

new_data_scaled <- scale(new_data)

new_data_dist <- dist(new_data_scaled, 
                        method = "euclidean"
                        )
as.matrix(new_data_dist)[1:7,1:7]
##          1        2        3        4        5        6        7
## 1 0.000000 7.605708 6.331840 4.414874 6.645623 7.923487 6.871952
## 2 7.605708 0.000000 2.624659 7.921597 3.357361 3.631018 2.133443
## 3 6.331840 2.624659 0.000000 6.321666 4.350331 3.464837 1.838549
## 4 4.414874 7.921597 6.321666 0.000000 8.095849 7.161240 7.283456
## 5 6.645623 3.357361 4.350331 8.095849 0.000000 4.966244 3.718748
## 6 7.923487 3.631018 3.464837 7.161240 4.966244 0.000000 3.179530
## 7 6.871952 2.133443 1.838549 7.283456 3.718748 3.179530 0.000000
new_data_hc <- hclust(d = new_data_dist, 
                        method = "ward.D2")

fviz_dend(new_data_hc, 
          cex = 0.1)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

pheatmap(
  new_data_scaled,
  show_rownames = FALSE,
  clustering_distance_rows = new_data_dist,
  clustering_method = "ward.D2",
  cutree_rows = 5,
  cutree_cols = length(colnames(new_data_scaled)),
  angle_col = 45,
  main = "Dendrograms for clustering rows and columns with map"
)

# На основе дендрограммы кластеризации было выделено 5 кластеров,кроме того мы упорядочили колонки так, чтобы похожие колонки были ближе друг к другу, красным отмечены более высокие значения, а голубым - более низкие. Так стобцы GDP и GNI в 3 кластере включают больше наблюдений.

PCA analysis

new_data_pca <- prcomp(new_data_scaled, 
                scale = F)
new_data_pca$rotation
##                                                 PC1          PC2         PC3
## Life expectancy                          0.33006689  0.057204617 -0.05177228
## Unemployment                            -0.04360407  0.118385016 -0.21744626
## Infant Mortality                        -0.31955172  0.087671945  0.03648213
## GDP                                      0.09276553  0.219603887  0.62719313
## GNI                                      0.09354118  0.228616655  0.62558678
## Clean fuels and cooking technologies     0.31191706  0.050540081 -0.12350100
## Per Capita                               0.20074534  0.231091189  0.01931646
## Mortality caused by road traffic injury -0.25349810 -0.058468218  0.05350435
## Tuberculosis Incidence                  -0.23013005  0.040892548  0.08082447
## DPT Immunization                         0.24212402 -0.427377944  0.12699330
## HepB3 Immunization                       0.20937238 -0.477929780  0.13269426
## Measles Immunization                     0.24602541 -0.401989482  0.10628068
## Hospital beds                            0.20468586  0.002991494  0.05053434
## Basic sanitation services                0.31411211  0.053310140 -0.08827323
## Tuberculosis treatment                  -0.14131661 -0.138134329  0.14121743
## Urban population                         0.24534367  0.308059080 -0.14538934
## Rural population                        -0.24534367 -0.308059080  0.14538934
## Non-communicable Mortality              -0.26355324 -0.049510722  0.04557013
## Sucide Rate                              0.03878119  0.163212116  0.13886389
##                                                  PC4         PC5          PC6
## Life expectancy                          0.089513675  0.08036646  0.018565760
## Unemployment                            -0.216386935 -0.61561243  0.494582801
## Infant Mortality                         0.078994634 -0.13004272 -0.218691072
## GDP                                     -0.177142715 -0.02121091  0.103010691
## GNI                                     -0.168469681 -0.03399209  0.090623806
## Clean fuels and cooking technologies    -0.107962685 -0.01481436  0.210740240
## Per Capita                               0.317879743  0.07476950 -0.391358563
## Mortality caused by road traffic injury -0.209266232 -0.26295907 -0.251606180
## Tuberculosis Incidence                   0.156467649 -0.27269069  0.006312097
## DPT Immunization                         0.026072879 -0.21158797 -0.144538424
## HepB3 Immunization                      -0.029261698 -0.23706435 -0.096100139
## Measles Immunization                     0.007926382 -0.18891535 -0.144925604
## Hospital beds                            0.335284926  0.06428811  0.349644874
## Basic sanitation services                0.057088341 -0.02959889  0.230295348
## Tuberculosis treatment                  -0.118308900 -0.01200118 -0.005160191
## Urban population                        -0.176581127 -0.26016879 -0.310930274
## Rural population                         0.176581127  0.26016879  0.310930274
## Non-communicable Mortality               0.276485055 -0.20390510  0.085450283
## Sucide Rate                              0.654299331 -0.35772127 -0.019335506
##                                                 PC7           PC8          PC9
## Life expectancy                         -0.02966855 -1.874076e-01 -0.126804914
## Unemployment                             0.19219695 -1.710033e-01 -0.155292293
## Infant Mortality                         0.07090199 -2.653059e-02  0.070234452
## GDP                                      0.08456071 -6.505088e-05 -0.005728914
## GNI                                      0.07444439 -9.965249e-03  0.005427434
## Clean fuels and cooking technologies    -0.04081980 -9.629118e-02 -0.087330164
## Per Capita                               0.19432111 -2.534085e-01 -0.450249421
## Mortality caused by road traffic injury  0.01924042 -3.314449e-02  0.278670279
## Tuberculosis Incidence                  -0.19254401  4.856493e-01 -0.671873972
## DPT Immunization                         0.06274235  2.856350e-02 -0.028027646
## HepB3 Immunization                       0.06715318  5.903168e-02 -0.029719944
## Measles Immunization                     0.02887483  1.889758e-02 -0.014429381
## Hospital beds                           -0.31296677  4.855598e-01  0.256789546
## Basic sanitation services               -0.03992494 -2.450440e-01 -0.046696803
## Tuberculosis treatment                  -0.81954312 -4.474203e-01 -0.143488181
## Urban population                        -0.19999539  1.557952e-01  0.128743039
## Rural population                         0.19999539 -1.557952e-01 -0.128743039
## Non-communicable Mortality               0.05562817 -1.989353e-01  0.093895930
## Sucide Rate                             -0.06345358 -1.887176e-01  0.287808040
##                                                 PC10        PC11        PC12
## Life expectancy                          0.084924190  0.13779658  0.11062481
## Unemployment                             0.180440857 -0.33876116 -0.09529601
## Infant Mortality                         0.010372306 -0.14839882  0.03623480
## GDP                                     -0.032110598 -0.00936001  0.01721159
## GNI                                     -0.041802877 -0.01269674  0.03313709
## Clean fuels and cooking technologies     0.048522215  0.32612297  0.07991169
## Per Capita                               0.292347008 -0.37940437  0.20872964
## Mortality caused by road traffic injury  0.598413965  0.29574822  0.40805889
## Tuberculosis Incidence                  -0.023773929  0.32870474  0.04038651
## DPT Immunization                        -0.040977482 -0.11650266 -0.12551303
## HepB3 Immunization                      -0.066250343 -0.12673472 -0.08944260
## Measles Immunization                     0.002942612  0.08431630  0.19760402
## Hospital beds                            0.286894543 -0.37184766  0.29722319
## Basic sanitation services               -0.061260655  0.33026350  0.31360286
## Tuberculosis treatment                   0.057902695 -0.16262514 -0.05263385
## Urban population                        -0.202824477 -0.06229573  0.02564452
## Rural population                         0.202824477  0.06229573 -0.02564452
## Non-communicable Mortality              -0.570189360 -0.08715385  0.57042725
## Sucide Rate                              0.089301394  0.26883371 -0.42298275
##                                                 PC13         PC14          PC15
## Life expectancy                         -0.486516678 -0.230496048  5.396929e-02
## Unemployment                            -0.069205415 -0.102131834 -6.399472e-02
## Infant Mortality                         0.556918161 -0.365005913  3.976385e-01
## GDP                                      0.012961176  0.004560391 -1.697393e-02
## GNI                                     -0.003176222 -0.020821126  1.428525e-02
## Clean fuels and cooking technologies     0.597852821  0.425259244 -2.562549e-01
## Per Capita                               0.083406683  0.159042542 -6.939584e-02
## Mortality caused by road traffic injury -0.156075552  0.191616473  2.648679e-02
## Tuberculosis Incidence                  -0.052974407  0.002024872  4.593249e-02
## DPT Immunization                        -0.031864590  0.216906672  2.181899e-01
## HepB3 Immunization                      -0.005039022  0.252346213  2.906669e-01
## Measles Immunization                     0.177399009 -0.595548459 -5.083456e-01
## Hospital beds                            0.064929402  0.006696123  3.960996e-02
## Basic sanitation services                0.081058047 -0.209588283  5.819012e-01
## Tuberculosis treatment                   0.033323536  0.004102715 -2.076603e-02
## Urban population                        -0.031452067  0.009692941  4.490951e-05
## Rural population                         0.031452067 -0.009692941 -4.490951e-05
## Non-communicable Mortality              -0.100710172  0.222548004 -1.475618e-01
## Sucide Rate                             -0.009840195  0.028046006 -9.053576e-02
##                                                 PC16          PC17
## Life expectancy                         -0.696266932  0.0010833200
## Unemployment                             0.002664070  0.0191440029
## Infant Mortality                        -0.423704229  0.0385194485
## GDP                                      0.023394739 -0.0017777024
## GNI                                     -0.048111465  0.0115060301
## Clean fuels and cooking technologies    -0.300844342  0.0215577438
## Per Capita                               0.170368470 -0.0474112984
## Mortality caused by road traffic injury  0.008712805  0.0119227383
## Tuberculosis Incidence                  -0.014695088  0.0151111488
## DPT Immunization                        -0.038820748  0.7365137990
## HepB3 Immunization                      -0.102859898 -0.6691391704
## Measles Immunization                     0.129043569 -0.0314726643
## Hospital beds                           -0.046348258  0.0129550977
## Basic sanitation services                0.411576630  0.0055180110
## Tuberculosis treatment                   0.016428620 -0.0008812946
## Urban population                         0.020884322 -0.0104448446
## Rural population                        -0.020884322  0.0104448446
## Non-communicable Mortality              -0.093980507  0.0254033161
## Sucide Rate                              0.052305726 -0.0517403022
##                                                  PC18          PC19
## Life expectancy                         -0.0473220348 -2.142886e-16
## Unemployment                             0.0007794072 -2.848376e-17
## Infant Mortality                        -0.0356819468 -6.740917e-17
## GDP                                     -0.7055006478  1.278685e-15
## GNI                                      0.7057642782 -1.449871e-15
## Clean fuels and cooking technologies     0.0063834316 -7.245161e-16
## Per Capita                               0.0100840283 -2.647930e-16
## Mortality caused by road traffic injury -0.0034193690  3.632334e-18
## Tuberculosis Incidence                   0.0016041278 -6.911827e-17
## DPT Immunization                        -0.0090687898  1.068521e-16
## HepB3 Immunization                       0.0007918953  8.727128e-17
## Measles Immunization                     0.0073766000 -1.092427e-16
## Hospital beds                           -0.0040983059 -4.687964e-17
## Basic sanitation services                0.0010444312  3.057854e-16
## Tuberculosis treatment                  -0.0038228266  4.529913e-17
## Urban population                        -0.0084857085  7.071068e-01
## Rural population                         0.0084857085  7.071068e-01
## Non-communicable Mortality              -0.0134625522 -8.268647e-17
## Sucide Rate                             -0.0008333885  6.735392e-17

PC1, PC2, …, PC19 - это главные компоненты, и каждый столбец отражает вклад каждой переменной в данный компонент. Значения в каждой строке представляют собой вес или вклад соответствующей переменной в соответствующий главный компонент. Положительные и отрицательные значения указывают на направление влияния. Переменные с более высокими абсолютными значениями вносят больший вклад в главный компонент.

Biplot

ggbiplot(new_data_pca, 
         groups = as.factor(data$continent), 
         ellipse = T,
         scale=0, alpha = 0.1) + 
  theme_minimal()

Дайте содержательную интерпретацию PCA анализу.

summary(new_data_pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.7526 1.4841 1.3952 1.17177 1.08375 0.96347 0.9288
## Proportion of Variance 0.3988 0.1159 0.1025 0.07227 0.06182 0.04886 0.0454
## Cumulative Proportion  0.3988 0.5147 0.6172 0.68945 0.75126 0.80012 0.8455
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.85740 0.69263 0.68937 0.59106 0.54986 0.47085 0.36596
## Proportion of Variance 0.03869 0.02525 0.02501 0.01839 0.01591 0.01167 0.00705
## Cumulative Proportion  0.88421 0.90946 0.93447 0.95286 0.96877 0.98044 0.98749
##                           PC15    PC16    PC17    PC18      PC19
## Standard deviation     0.34546 0.26941 0.20224 0.06968 8.804e-16
## Proportion of Variance 0.00628 0.00382 0.00215 0.00026 0.000e+00
## Cumulative Proportion  0.99377 0.99759 0.99974 1.00000 1.000e+00
fviz_contrib(new_data_pca, choice = "var", axes = 1, top = 24) # 1

fviz_contrib(new_data_pca, choice = "var", axes = 2, top = 24) # 2

fviz_contrib(new_data_pca, choice = "var", axes = 3, top = 24) # 3

fviz_pca_var(new_data_pca, 
             select.var = list(contrib = 3), # Задаём число здесь 
             col.var = "contrib")

# Первые 3 компоненты объясняют 61,7% выриации данных.

Сравнение результатов отображения точек между алгоритмами PCA и UMAP.

umap_data <- recipe(~., data = new_data) %>% 
  step_normalize(all_predictors()) %>%
  step_umap(all_predictors()) %>%  
  prep() %>% 
  juice() 

umap_data %>%
  ggplot(aes(UMAP1,  2)) + #  # можно добавить раскраску 
  geom_point(aes(color = data$continent), 
             alpha = 0.7, size = 2) +
  labs(color = NULL)